import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
For demonstration purposes, let us take a 2 dimensional dataset with tow features (Feature_1 and Feature_2) and consisting of two classes (Class A and Class B) having a distribution specifications as follows:
Class A: The Class A is centred around the mean of (1,1) and has the covariance matrix [[1,-0.2],[-0.2,1]]
Class B: The Class B is centred around the mean of (3,4) and has the covariance matrix [[1,0.1],[0.1,1]]
Defintions:
Mean: A Class with centre (x1, x2) as mean denotes that the average value along "Feature_1" is x1 and the average value along "Feature_2" is x2
P.S: Since we would like to ensure that the outputs corrosponds to the desired output, we will also add the seed value of 42 while generating these distributions.
mean_01=np.array([1,1])
cov_01=np.array([[1,-0.2],[-0.2,1]])
mean_02=np.array([3,4])
cov_02=np.array([[1,0.1],[0.1,1]])
np.random.seed(42)
data_01=np.random.multivariate_normal(mean_01,cov_01,500, check_valid= "warn")
data_02=np.random.multivariate_normal(mean_02,cov_02,500, check_valid= "warn")
data = np.vstack((data_01,data_02))
df_train = pd.DataFrame(data, columns = ["Feature_1", "Feature_2"])
df_train["class"] = [0]*500 + [1]*500
fig = go.Figure(
layout = dict(
width = 800,
height = 800,
title_text = "Visualization of the dataset",
xaxis = dict(
title = dict(
text = "Feature_1"
)
),
yaxis = dict(
title = dict(
text = "Feature_2"
)
)
)
)
scatter_trace_1 = go.Scatter(
x = df_train["Feature_1"][:500],
y = df_train["Feature_2"][:500],
mode = "markers",
name= "Class A",
hovertemplate = "Feature_1: %{x}<br>Feature_2: %{y}",
marker = dict(
size = 9,
opacity = .80,
color = "lightblue",
line = dict(
color = "blue",
width = 1,
)
)
)
scatter_trace_2 = go.Scatter(
x = df_train["Feature_1"][500:],
y = df_train["Feature_2"][500:],
mode = "markers",
name= "Class B",
hovertemplate = "Feature_1: %{x}<br>Feature_2: %{y}",
marker = dict(
symbol = "star-triangle-up",
size = 10,
opacity = 0.65,
color ="darkorange",
line = dict(
color = "red",
width = 1,
)
)
)
fig.add_trace(scatter_trace_1)
fig.add_trace(scatter_trace_2)
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput1.html")
X = df_train[["Feature_1","Feature_2"]]
Y = df_train[["class"]]
X = np.hstack((np.ones((1000,1)),X.to_numpy()))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y.to_numpy(), test_size=0.2, random_state=42)
print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)
def hypothesis(x,theta):
sigmoid=(1.0/(1.0 + np.exp(-1.0*np.dot(x,theta))))
return(sigmoid)
def error(X,Y,theta):
m=X.shape[0]
err=0
for i in range(m):
hx=hypothesis(X[i],theta)
err+=Y[i]*np.log2(hx) + (1-Y[i])*np.log2(1-hx)
err /=m
return(-err)
def gradient(X,Y,theta):
grad=np.zeros((X.shape[1]))
m=X.shape[0]
fea=X.shape[1]
for i in range(m):
hx=hypothesis(X[i],theta)
for j in range(fea):
grad[j]+=(hx-Y[i])*X[i,j]
grad=grad/m
return(grad)
def gradient_ascent(X,Y,learning_rate=0.5):
theta=2*np.random.random(X.shape[1])
theta[0]=0
error_list=[]
acc_list=[]
theta_list = []
for i in range(100):
grad=gradient(X,Y,theta)
err=error(X,Y,theta)
error_list.append(err)
acc_list.append(accuracy(X,Y,theta))
theta_list.append(theta.copy())
for j in range(X.shape[1]):
theta[j]-=learning_rate*grad[j]
probabilty_list = predic_proba(X, theta)
return(theta, theta_list, error_list, acc_list, probabilty_list)
def predict(x,theta):
p=hypothesis(x,theta)
if p<0.5:
return 0
else:
return 1
def predic_proba(x,theta):
probabilty_list = []
for i in range(X.shape[0]):
probability = hypothesis(X[i],theta)
probabilty_list.append(probability)
return probabilty_list
def accuracy(X,Y,theta):
y_pred=[]
for i in range(X.shape[0]):
p=predict(X[i],theta)
y_pred.append(p)
y_pred=np.array(y_pred)
y_pred=y_pred.reshape((-1,1))
return(Y==y_pred).sum()/X.shape[0]
theta, theta_list, error_list, acc_list, probabilty_list=gradient_ascent(X_train,Y_train)
fig = go.Figure(
layout = dict(
width = 800,
height = 800,
title_text = "Visualising the error",
xaxis = dict(
title = dict(
text = "Iteration"
)
),
yaxis = dict(
title = dict(
text = "Error (Negative of maximum likelihood)"
)
)
)
)
fig.add_trace(go.Scatter(
x = [x for x in range(1,101,1)],
y = [x[0] for x in error_list],
mode = "lines+markers",
name= "",
marker = dict(
color = "lightblue",
line = dict(
color = "blue",
width = 1,
)
),
hovertemplate = "Iteration: %{x}<br>Error: %{y}",
))
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput2.html")how()
fig = go.Figure(
layout = dict(
width = 800,
height = 800,
title_text = "Visualising the Accuracy",
xaxis = dict(
title = dict(
text = "Iteration"
)
),
yaxis = dict(
title = dict(
text = "Accuracy"
)
)
)
)
fig.add_trace(go.Scatter(
x = [x for x in range(1,101,1)],
y = acc_list,
mode = "lines+markers",
name= "",
marker = dict(
color = "lightblue",
line = dict(
color = "blue",
width = 1,
)
),
hovertemplate = "Iteration: %{x}<br>Accuracy: %{y}",
))
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput3.html")
print("The accuracy for the algorithm is:",acc_list[-1])
print("The final theta parameters calculated are:",theta)
sliders_dict = {
'active': 0,
'yanchor': 'top',
'xanchor': 'left',
'currentvalue': {
'font': {'size': 20},
'prefix': 'No. of iterations:',
'visible': True,
'xanchor': 'right'
},
'transition': {'duration': 300, 'easing': 'linear'},
'pad': {'b': 10, 't': 50},
'len': 0.9,
'x': 0.1,
'y': 0,
'steps': []
}
frames = []
for i in range(100):
frame = go.Frame(
data = [scatter_trace_1, scatter_trace_2,
go.Scatter(
x = np.linspace(-3,7,2),
y = -1*(theta_list[i][0]+np.linspace(-4,8,2)*theta_list[i][1])/theta_list[i][2],
mode = "lines",
name = "Decision Boundry",
hoverinfo = "none"
)
],
name = str(i+1)
)
frames.append(frame)
for i in range(100):
slider_step = {'args': [
[i+1],{
'frame': {'duration': 300, 'redraw': True},
'mode': 'immediate',
'transition': {'duration': 300}
}],
'label': i+1,
'method': 'animate'}
sliders_dict['steps'].append(slider_step)
fig = go.Figure(
data = [scatter_trace_1, scatter_trace_2,
go.Scatter(
x = np.linspace(-3,7,2),
y = -1*(theta_list[0][0]+np.linspace(-4,8,2)*theta_list[0][1])/theta_list[0][2],
mode = "lines",
name = "Decision Boundry",
hoverinfo = "none"
)],
layout = go.Layout(updatemenus=[{
'buttons': [{
"args": [None,{"fromcurrent": True,
"transition": {"duration": 50,
"easing": "linear"}}],
'label': 'Play',
'method': 'animate'
},
{
'args': [[None],{'frame': {'duration': 0, 'redraw': False},
'mode': 'immediate',
'transition': {'duration': 0}}],
'label': 'Pause',
'method': 'animate'
}],
'direction': 'left',
'pad': {'r': 10, 't': 87},
'showactive': False,
'type': 'buttons',
'x': 0.1,
'xanchor': 'right',
'y': 0,
'yanchor': 'top'
}]),
frames = frames
)
fig.update_layout(
width = 800,
height = 800,
title_text = "Visualising the convergence of decision boundary",
xaxis = dict(
range = [-3.5,7.5],
title = dict(
text = "Feature_1"
)
),
yaxis = dict(
title = dict(
text = "Feature_2"
)
)
)
fig['layout']['sliders'] = [sliders_dict]
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput4.html")
fig = go.Figure(
layout = dict(
width = 800,
height = 800,
title_text = "Visualization of the decision boundary",
xaxis = dict(
range = [-4,8],
title = dict(
text = "Feature_1"
)
),
yaxis = dict(
title = dict(
text = "Feature_2"
)
)
)
)
fig.add_trace(scatter_trace_1)
fig.add_trace(scatter_trace_2)
fig.add_trace(go.Scatter(
x = np.linspace(-3,7,2),
y = -1*(theta[0]+np.linspace(-4,8,2)*theta[1])/theta[2],
mode = "lines",
name = "Decision Boundry",
hoverinfo = "none"
))
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput5.html")
plt.show()